package com.dmcb.trade.business.crawlers.author;

import com.dmcb.common.business.services.BaseService;
import com.dmcb.common.business.utils.DateUtil;
import com.dmcb.common.web.conversion.JsonResult;
import com.dmcb.common.business.utils.WebClientUtil;
import com.dmcb.trade.controller.ArticleCrawlerController;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

/**
 * 爱范儿网站文章全导
 * Created by wangyong on 2017/8/18.
 */
@Service
public class AiFanErCrawler extends BaseService {
    @Autowired
    private WebClientUtil webClientUtil;
    @Autowired
    private ArticleCrawlerController articleCrawlerController;
    Logger logger = LoggerFactory.getLogger(AiFanErCrawler.class);

    public JsonResult crawl() {
        int page = 1;
        int ero;
        Document doc;
        String[] types = new String[]{"coolbuy", "ifanrnews", "product", "intelligentcar", "review", "business", "people", "minapp"};
        int allNum = 0;//文章总数
        int success = 0;//导入成功数
        // http://www.ifanr.com/category/product?page=2
        JsonResult jsonResult;
        for (String type : types) {
             String postId = "";
            if (!type.equals("coolbuy") && !type.equals("minapp")) {
                type = "category/" + type;
            }
            //最后一篇文章的请求ID
            String src;
            boolean next = false;
            while (!next) {
                ero = 0;
                src = "http://www.ifanr.com/" + type + "?page=" + page + "&pajax=1&post_id__lt=" + postId + "&show_type=list";
                //http://www.ifanr.com/category/product?page=3&pajax=1&post_id__lt=894145
                doc = webClientUtil.getDoc(src);
                DateUtil.sleep(10);
                if (doc == null) {
                    logger.error("该连接获取不到数据：" + src);
                    break;
                }
                Elements elements = doc.select("div.list-posts>div");
                if (elements == null || elements.size() == 0) {
                    logger.error("该连接获取不到数据：" + src);
                    break;
                }
                Element element;
                for (int i = elements.size(); i > 0; i--) {
                    element = elements.get(i - 1);
                    if (i == 1) {
                        postId = element.select("div.article-meta").attr("data-post-id");
                    }
                    allNum++;
                    String link = element.select("a.article-link").attr("href");
                    jsonResult = articleCrawlerController.crawl(4740, link);
                    if (jsonResult.getStatus() == JsonResult.STATUS_SUCCESS) {
                        success++;
                    } else {
                        ero++;
                        if (ero >= elements.size()) {
                            next = true;
                        }
                    }
                }
                page++;
            }
        }
        return success("文章总数：" + allNum + "导入成功数：" + success);
    }
}
